* REPLICATION OF MAZUMDER 2018 (Stata version 14)
* Michael Biggs (University of Oxford)
* Christopher Barrie (University of Oxford)


* INPUT: PUBLICLY AVAILABLE DATA (not supplied)
* (1) Mazumder's dataset from Harvard dataverse
* https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/WKJJ3Z
*	Input/Mazumder/abs-jop-cces-white-countydata.csv
*	Input/Mazumder/civil_rights_protest_final.csv
* (2) CCES survey data from Harvard dataverse
* https://dataverse.harvard.edu/dataverse/cces
* 	Input/CCES/cces_2006_common.dta
* 	Input/CCES/cces_2008_common.dta
* 	Input/CCES/cces09_cmn_output_2.dta
* 	Input/CCES/cces_2010_common_validated.dta
* 	Input/CCES/CCES11_Common_OUTPUT.dta
* (3) 1960 Census from ICPSR
* https://www.icpsr.umich.edu/icpsrweb/
* 	Input/ICPSR_02896/DS0074/02896-0074-Data.dta
*	Input/ICPSR_00003/DS0019/00003-0019-Data.txt
* (4) Dynamics of Collective Action in the United States, 1960–1995
* https://www.stanford.edu/group/collectiveaction/
* 	Input/DoCA/final_data_v10.dta
* (5) *U.S. Board on Geographic Names, 2014, 
* http://geonames.usgs.gov
*   Input/USGS/POP_PLACES_20140802.txt


* INPUT: ANCILLARY FILES (supplied)
* (3b) Data dictionary for ICPSR 3  
*	Files/ICPSR3_dict.do
* (4) 1960 Census from our own transcription
*   Files/1960Census.xlsx

* OUTPUT (supplied)
* (1) Stata datasets
*	Output/counties.dta
*	Output/respondents.dta
* (2) Log files
*	Output/log.txt
*	Output/results.txt

* OUTPUT (not supplied)
* (3) Graphs
*	Output/Figure_1_affhighres.png
*	Output/Figure_A1_pvalues
*	Output/Figure_A2_reshighres.png
*	Output/Figure_A3_demhighres.png
* (4) Tables 
*	Output/ReplicationTables.xls
*	Output/tryout_aff.xls		


version 14

if "`user'"=="CB" 	cd "/Users/christopherbarrie/Dropbox/AJPS_response/Mazumder_replication_files"
else 				cd "/Users/michael/Dropbox/Mazumder_replication_files"

*global longtime ""
global longtime "Y"

capture: program drop getaic
program define getaic, rclass
quietly estat ic
tempname aic
scalar `aic' = el(r(S), 1, 5)
di "AIC " %-7.0fc (`aic')
return scalar aic = `aic'
end

* This program would send output to Excel worksheet for table; if it is not installed, don't worry
capture: niceoutputs
if _rc==199 {
	program define niceoutputs
	syntax using/, sheet(name) [fullnames coefficient odds ic aicestat roc roclarge effects]
	getaic
	end
	}

capture: file close r
file open r using Output/results.txt, write text replace
capture: log close
log using Output/log.txt, text replace


************************************************************************************************************************************
************************************************************************************************************************************
*** PROCESS NYT EVENTS
************************************************************************************************************************************
************************************************************************************************************************************

use Input/DoCA/final_data_v10, clear
save Temp/nyt_events, replace

* African American Civil Rights
gen byte CR_protest = ///
	(claim1>=1500 & claim1<1600 & val1==1) | ///
	(claim2>=1500 & claim2<1600 & val2==1) | ///
	(claim3>=1500 & claim3<1600 & val3==1) | ///
	(claim4>=1500 & claim4<1600 & val4==1)
drop if ! CR_protest

* Date
gen evdate=mdy(evmm,evdd,evyy)
gen rptdate=mdy(rptmm,rptdd,rptyy)
format evdate rptdate %td
sort evdate
list evdate in 1, clean noobs
drop if evdate < mdy(1, 1, 1960) | evdate > mdy(8, 6, 1965)
* Mazumder: "stop the data collection on protests in 1965 with the passage of the Voting Rights Act"
gen byte CR_protest_spring60 = CR_protest==1 & evdate>=mdy(2, 1, 1960) & evdate<=mdy(4, 14, 1960) & evdate!=. 
tab CR_protest_spring60 if CR_protest==1

* Participants
gen byte part_missing = particex==.
count if particex<1
count if particex==.
gen long partica=particex
replace partica=5 if partices==1
replace partica=30 if partices==2
replace partica=75 if partices==3
replace partica=300 if partices==4      //hundreds
replace partica=3000 if partices==5  	//thousands
replace partica=30000 if partices==6	//tens of thousands
count if partica==.
gsort - partica
list partica city1 state1 evdate if partica>=10000, clean noobs

* Deal with multiple cities
tab cityn staten 
* two cities in one state => duplicate
list cityn city1-city2 state1-state4 partica if cityn>1 & staten==1, clean noobs
expand 2 if cityn>1 & staten==1, gen(duplicated)
replace city1=city2 if duplicated
replace partica = partica / cityn if cityn>1 & staten==1
* cities in multiple states can't be located as cities don't match states => exclude 7 observations 
list cityn city1-city2 state1-state4 partica if cityn>1 & staten>1, noobs sepby(staten)
replace city1 = "EXCLUDE" if cityn>1 & staten>1

* Clean up geography
gen state_abb = state1
gen city=proper(city1)
tab city if state_a==""
replace state_abb = "DC" if city=="Washington" & state_a==""
drop if state_abb == "AK"  // Alaska and is not in Mazumder's analysis
* Correct typographical errors
replace city = "Gadsden" 	if city=="Gadsen" & state_a=="AL"
replace city = "Notasulga" 	if city=="Natasulga" & state_a=="AL"
replace state_abb = "NY" 		if city=="New York" & state_a=="CA"
replace city = "Stamford" 	if city=="Stanford" & state_a=="CT"
replace city = "La Grange" 	if city=="Lagrange" & state_a=="GA"
replace city =	"East Saint Louis"	if (city =="Saint Louis" | city=="St. Louis") & state_a== "IL"
replace city =	"Wenona"	if city == 	"Winona" & state_abb ==	"IL"
replace state_a = "FL" if title=="FLORIDA RIOTS HELD SPUR TO NEGRO UNITY"
replace city =	"Plaquemine"	if city == 	"Plaqumine"	& state_abb ==	"LA"
replace city =	"Bogalusa"	if city == 	"Bogalsua"	& state_abb ==	"LA"
replace state_abb =	"MS"	if city=="Hattiesburg" & state_abb ==	"MI" 
replace state_abb =	"MS"	if city=="Moss Point" & state_abb ==	"MI" 
replace state_abb =	"MS"	if city=="Mccomb" & state_abb ==	"MI" 
* note also another event in Miss. coded as MI! (no city identified)
replace city =	"Havre de Grace" if city =="Havre De Grace"	& state_abb ==	"MD"
replace city =	"Fannin" if city =="Fanin"	& state_abb =="MS"
replace city =	"McComb" if city =="Mccomb"	& state_abb =="MS"
replace city = 	"Asheville"	if city == "Ashville" & state_abb ==	"NC"
replace city = "Asbury Park" if city =="Ashbury Park"	& state_abb =="NJ"
replace state_abb = "NY" if city =="Hempstead"	& state_abb =="NJ"
replace city = "Keansburg" if city =="Keansberg"	& state_abb =="NJ"
replace city = "Millburn" if city =="Milburn"	& state_abb =="NJ"
replace city = "Paterson" if city =="Patterson"	& state_abb =="NJ"
replace state_abb = "CT" if city =="New Haven"	& state_abb =="NJ"
replace state_abb = "NJ" if city =="Jersey City"	& state_abb =="NY"
replace city = "Amityville" if strpos(where, "AMITYVILLE") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Hicksville" if strpos(where, "HICKSVILLE") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Lakeview" if strpos(where, "LAKEVIEW") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Lynbrook" if strpos(where, "LYNBROOK") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Malverne" if strpos(where, "MALVERNE") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Mineola" if strpos(where, "MINEOLA") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Manhasset" if strpos(where, "MANHASSET") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "West Hempstead" if strpos(where, "WEST HEMPSTEAD") > 0 & city =="Long Island" & state_abb =="NY"
replace city = "Saratoga Springs" if city =="Sarasota"	& state_abb =="NY"
replace city = "Rock Hill" if city =="Rockhill"	& state_abb =="SC"
replace city = "Newport News" if city=="Newport" & state_a=="VA"
replace city = "Richmond" if city =="Rickmond"	& state_abb =="VA"

rename state_abb state_abb
rename partica CR_participants
save Temp/nyt_events, replace

******************
* CITIES
******************

insheet using "Input/USGS/POP_PLACES_20140802.txt" , delimiter("|") clear names

gen state_abb = state_alpha
gen city = feature_name
gen fips = state_num *1000 + county_num

replace county_name = cond(  substr(county_name,-6,6)!="(city)", county_name, substr(county_name,1,length(county_name)-6) + "city")

drop if state_abb=="AK"
by state_abb city, sort: egen city_duplicates = count(fips)
tab city_duplicates
drop if city_duplicates > 1
duplicates drop state_abb city county_name, force
keep state_abb city county_name fips

merge 1:m state_abb city using Temp/nyt_events

* Places where the city name is actually the county name
replace county_name =	"Hillsborough"	if city == 	"Hillsborough"	& state_abb ==	"FL"
replace county_name =	"Lee"	if city == 	"Lee County"	& state_abb ==	"GA"
replace county_name =	"Amite"	if city == 	"Amite County"	& state_abb ==	"MS"
replace county_name =	"Fayette"	if city == 	"Fayette"	& state_abb ==	"TN"
replace county_name =	"Haywood"	if city == 	"Haywood"	& state_abb ==	"TN"
replace county_name =	"Nassau"	if where ==	"NASSAU COUNTY SCHOOL DISTRICTS"	& state_abb ==	"NY"

* Place where the city name is not unique within state
replace county_name =	"Lee"	if city == "Leesburg"	& state_abb == "GA"
replace county_name =	"Dade"	if city == "Wildwood"	& state_abb == "GA"
replace county_name =	"Jefferson"	if city == "Mount Vernon"	& state_abb == "IL"
replace county_name =	"Ouachita"	if city == "Monroe"	& state_abb ==	"LA"
replace county_name =	"Oakland"	if city == "Oak Park"	& state_abb ==	"MI"
replace county_name =	"Bolivar"	if city == 	"Cleveland"	& state_abb ==	"MS"
replace county_name =	"Leflore"	if city == 	"Greenwood"	& state_abb ==	"MS"
replace county_name =	"Lafayette"	if city == 	"Oxford"	& state_abb ==	"MS"
replace county_name =	"Newton"	if city == 	"Union"	& state_abb ==	"MS"
replace county_name =	"Orange"	if city == "Chapel Hill"	& state_abb == "NC"
replace county_name =	"Cabarrus"	if city == "Concord"	& state_abb == "NC"
replace county_name =	"Pasquotank"	if city == "Elizabeth City"	& state_abb == "NC"
replace county_name =	"Randolph"	if city == "Trinity"	& state_abb == "NC"
replace county_name =	"Somerset"	if city == "Franklin"	& state_abb == "NJ"
replace county_name =	"Morris"	if city == "Morristown"	& state_abb == "NJ"
replace county_name =	"Union"	if city == "Union"	& state_abb == "NJ"
replace county_name =	"Nassau"	if city == "Brooklyn"	& state_abb == "NY"
replace county_name =	"Kings"	if city == "Lakeview"	& state_abb == "NY"
replace county_name =	"Westchester"	if city == "Tarrytown"	& state_abb == "NY"
replace county_name =	"Dauphin"	if city == "Harrisburg"	& state_abb == "PA"
replace county_name =	"Allendale"	if city == "Allendale"	& state_abb == "SC"
replace county_name =	"York"	if city == "Rock Hill"	& state_abb == "SC"
replace county_name =	"Alexandria city"	if city == "Alexandria"	& state_abb == "VA"
replace county_name =	"Arlington"	if city == "Arlington"	& state_abb == "VA"
replace county_name =	"Hopewell"	if city == 	"Hopewell"	& state_abb ==	"VA"
replace county_name =	"Petersburg city"	if city == 	"Petersburg"	& state_abb ==	"VA"
replace county_name =	"Sussex"	if city == "Sussex"	& state_abb == "VA"

drop if _merge==1
drop _merge

gen byte cannotmatch = county_name==""
list where title state_abb city if cannotmatch, noobs clean

file write r "****** NYT protest events" _n
file write r "Cannot geographically match: " _n
summ cannotmatch
file write r %3.1f (r(mean) * 100) "% of events" _n
quietly summ CR_participants
local denom = r(mean)*r(N)
quietly summ CR_participants if cannotmatch
file write r %3.1f (r(mean)*r(N) / `denom' * 100) "% of participants" _n
drop if cannotmatch

collapse (sum) CR_protest* CR_participants, by(state_abb county_name)

corr CR_protest CR_participants
rename CR_protest CR_eventcount 
label var CR_eventcount "Number of Civil Rights protest events reported by NYT"
label var CR_participants "Total participants in Civil Rights protest events reported by NYT"
save Temp/nyt_events, replace


************************************************************************************************************************************
************************************************************************************************************************************
*** PROCESS COUNTY DATA
************************************************************************************************************************************
************************************************************************************************************************************

* Start with Mazumder's dataset
insheet using "Input/Mazumder/abs-jop-cces-white-countydata.csv", clear
local usefulvars "wbincratio2014 wbincratio00 pslave1860 thurmond48 longitude latitude totpop50 pdem1932-pdem1964 inflow outflow totpop00"
foreach var of varlist `usefulvars' {
	destring `var', replace force
	}	
keep fips stateabb county_name `usefulvars'  affirm resent dem samplesize* 
duplicates report fips
rename stateabb state_abb
replace county_name = county_name + " city" if fips==51620 | fips==51760  | fips==51770 
drop if state_abb=="HI" | state_abb=="AK"
save Output/counties, replace

insheet using "Input/Mazumder/civil_rights_protest_final.csv", clear
gen byte inMazumder = 1
merge m:1 fips using Output/counties
drop _merge
replace county_name = name if county_name==""
drop name
list state_abb county_name if inMazumder!=1, noobs sepby(state_abb)
save Output/counties, replace

clonevar samplesize_aff = samplesize
clonevar samplesize_dem = samplesize
rename samplesizeres samplesize_res
rename avgdemvsharepre avg_dem_vshare_pre
rename log_protest_event ln_protest_event
foreach var in medincome pctunemp pctoccupiedhousing medschoolyrs resent {
	destring `var', force replace 
	}

* Replicate Mazumder Table 1
gen byte mazsample = 0
file write r _n "****** MAZUMDER REPLICATION" _n
foreach dv in res aff dem {
	reg `dv' protest_indicator pctblack pcturban logtotpop1960 medincome avg_dem_vshare_pre i.state ///
		[aweight = samplesize_`dv'], vce(hc2) 
	file write r "** `dv'" _n
	file write r "protest effect: " _tab %4.3f (_b[protest_indicator]) _n
	file write r "s.e.: " _tab %4.3f (_se[protest_indicator])   _n
	file write r "N = " _tab (e(N))  _n
	file write r "R2 = " _tab %4.3f (e(r2)) _n _n
	quietly replace mazsample = 1 if mazsample==0 & e(sample)
	}

* Replicate Mazumder Appendix Table 3, model 1 & 6
local model1controls "c.pctblack##c.pctblack pcturban medincome avg_dem_vshare_pre"
reg res protest_indicator `model1controls' i.state [aweight = samplesize] if mazsample, vce(hc2)
reg res protest_indicator `model1controls' pctlfag pctunemp medianage medschoolyrs pctoccupiedhousing i.state [aweight = samplesize] if mazsample, vce(hc2)

* Now include the missing variable "from the main text" and use correct weighting
reg res protest_indicator `model1controls' logtotpop1960 i.state [aweight = samplesize_res] if mazsample, vce(hc2)
reg res protest_indicator `model1controls' logtotpop1960 pctlfag pctunemp medianage medschoolyrs pctoccupiedhousing i.state [aweight = samplesize_res] if mazsample, vce(hc2)
file write r "Corrected Model 6 in Appendix A4, including total population" _n 
test protest_indicator
file write r "p-value of protest: " %4.2f (r(p)) _n
	
* Repeat for affirmative action
reg aff protest_indicator `model1controls' logtotpop1960 pctlfag pctunemp medianage medschoolyrs pctoccupiedhousing i.state [aweight = samplesize] if mazsample, vce(hc2)

gen pdem32to60 = (pdem1932 + pdem1936 + pdem1940 + pdem1944 + pdem1948 + pdem1952 + pdem1956 + pdem1960) / 8
corr pdem32to60 avg_dem_vshare_pre
summ pdem32to60 avg_dem_vshare_pre if pdem32to60!=. & avg_dem_vshare_pre!=.
duplicates tag fips, gen(duplicate)
gen anomaly = abs(avg_dem_vshare_pre - pdem32to60) if duplicate
gsort -duplicate -anomaly
list state_abb county_name totpop60 pctblack pcturban logtotpop1960 medincome avg_dem_vshare_pre pdem32to60 if duplicate, clean
*two Montgomery VA listed - differs on avgdemvoteshare
drop in 1
drop anomaly
	
* Region
gen byte southconfed = state_abb=="AL"|state_abb=="AR"|state_abb=="FL"|state_abb=="GA"| ///
	state_abb=="LA"|state_abb=="MS"|state_abb=="NC"|state_abb=="SC"|state_abb=="TN"| ///
	state_abb=="TX"|state_abb=="VA"

* Create distance to NY var. using coordinates for New York County (where NYT Offices located, i.e., Manhattan)
gen latny=40.73521638288
gen lonny=-74.00547321265
geodist latitude longitude latny lonny, gen(nydist)
gen rootnydist = sqrt(nydist)
gen nydist2 = nydist^2
gen latitude2 = latitude^2
gen longitude2 = longitude^2
save, replace
	
* 1960 Census: number of college students and other variables
use "Input/ICPSR_02896/DS0074/02896-0074-Data.dta", clear
rename var31 collegestudents60
rename var3 pop60 
rename var6 pcturban60
rename var26 medianschoolyears60
rename var22 medincome60
rename var35 pctunemp60
rename var34 totalcivilianlaborforce
rename var37 totalcivilianlaborforceemployed
rename var38 laborforce_agriculture
rename var51 totalhousingunits
rename var56 occupiedhousingunits
rename var58 over1perroompcthousing1960
rename var60 owneroccupiedhousingunits
replace pctunemp60 = 0 if totalcivilianlaborforce==totalcivilianlaborforceemployed ///
	& totalcivilianlaborforce!=.
gen agpctemployed60 = laborforce_agriculture / totalcivilianlaborforceemployed * 100
gen agpctlaborforce60 = laborforce_agriculture / totalcivilianlaborforce * 100
gen occupiedpcthousing60 = occupiedhousingunits / totalhousingunits * 100
gen ownerpcthousing60 = owneroccupiedhousingunits / totalhousingunits * 100
rename name countyname_icpsr
rename state state_icpsr
keep if level==1 & state_icpsr!=81 & state_icpsr!=82
keep fips collegestudents60 pop60 pcturban medianschoolyears60 medincome60 pctunemp60 agpct* ///
	 occupiedpcthousing60 ownerpcthousing60 over1perroompcthousing1960 countyname_icpsr state_icpsr
duplicates tag state_icpsr countyname_icpsr, gen(duplicate)
list if duplicate, clean noobs
* clean up D.C.
drop if fips==11000
replace state_icpsr = 55 if state_icpsr==98
replace countyname_icpsr = subinstr(countyname_icpsr," ","",.)
save Temp/temp, replace

* 1960 Census: black population
infile using Files/ICPSR3_dict.do, clear
keep if level=="C" & state!=81 & state!=82
gen pop_check = males + females
gen blackpop_icpsr = blackmales + blackfemales
duplicates list state_icpsr countyname_icpsr
merge 1:1 state_icpsr countyname_icpsr using Temp/temp.dta
list state_icpsr countyname_icpsr pop_check pop60 if pop_check != pop60 & _merge==3, clean noobs
drop pop_check
sort countyname_icpsr
list state_icpsr countyname_icpsr _merge if _merge!=3, sepby(state_icpsr) noobs
list countyname_icpsr fips _merge if state_icpsr==40, clean noobs 
* 2 cities for Virginia are suspicious! GO BACK TO THIS
list countyname_icpsr blackpop pop60 if blackpop==-2, clean noobs
replace blackpop= 411737 if countyname_icpsr=="DISTOFCOLUMBIA"
* Negro Population, by County, 1960 and 1950 p.2
gen pctblack60 = blackpop/pop60*100
gen ln_pop60 = ln(pop60)
drop if fips==.
drop _merge
clonevar countyname_icpsrmatch = countyname_icpsr
gen x = strpos(countyname_icpsr, "/") 
replace countyname_icpsrmatch = substr(countyname_icpsrmatch, 1, x-1) if x!=0
drop x
label define state_icpsr 1 "CT" 2 "ME" 3 "MA" 4 "NH" 5 "RI" 6 "VT" 11 "DE" 12 "NJ" 13 "NY" 14 "PA" 21 "IL" 22 "IN" 23 "MI" 24 "OH" 25 "WI" 31 "IA" 32 "KS" 33 "MN" 34 "MO" 35 "NE" 36 "ND" 37 "SD" 40 "VA" 41 "AL" 42 "AR" 43 "FL" 44 "GA" 45 "LA" 46 "MS" 47 "NC" 48 "SC" 49 "TX" 51 "KY" 52 "MD" 53 "OK" 54 "TN" 55 "DC" 56 "WV" 61 "AZ" 62 "CO" 63 "ID" 64 "MT" 65 "NV" 66 "NM" 67 "UT" 68 "WY" 71 "CA" 72 "OR" 73 "WA" 81 "AK" 82 "HI"
label values state_icpsr state_icpsr  
decode state_icpsr, gen(state_abb)
replace countyname_icpsrmatch = "VERMILION" if countyname_icpsrmatch=="VERMILLION" & state_abb=="LA"
save Temp/temp, replace

* 1960 Census: college education
import excel using Files/1960Census, clear sheet(Transpose) first
drop if state ==""
twoway scatter malecollege4yearsormore femalecollege4yearsormor 
reg Yearsschoolcompletedmale Yearsschoolcompletedfemale
predict p
gen residual = (Yearsschoolcompletedmale - p) / Yearsschoolcompletedmale

twoway ///
	(scatter Yearsschoolcompletedmale Yearsschoolcompletedfemale) ///
	(scatter Yearsschoolcompletedmale Yearsschoolcompletedfemale if residual>.5 | residual<-5, mlabel(county_name)), ///
	xscale(log) yscale(log) xlabel(10 100 1000 10000 100000) ylabel(10 100 1000 10000 100000) legend(off)
list county_name state  malecollege4yearsormore femalecollege4yearsormor if residual>.5 | residual<-5, clean noobs
gen college60 = malecollege4yearsormore + femalecollege4yearsormor
gen schoolyearsdenom60 = Yearsschoolcompletedmale + Yearsschoolcompletedfemale
gen pctcol60 = college60 / schoolyearsdenom60 * 100
summ pctcol60, det
gen pctcol602 = pctcol60^2
gen countyname_icpsrmatch = substr(    upper(   subinstr(  subinstr( subinstr(county_name,"'","",.) ,".","",.)  ," ","",.)   )    , 1, 16)
replace countyname_icpsrmatch = "STJOHNTHEBAPTI" if countyname_icpsrmatch=="STJOHNTHEBAPTIST" 
keep state countyname_icpsrmatch pctcol60* schoolyearsdenom60
duplicates list state_abb countyname_icpsr
merge 1:1 state_abb countyname_icpsrmatch using Temp/temp
sort state_abb countyname_icpsrmatch
list state_abb countyname_icpsrmatch pop60 schoolyearsdenom60 if _merge!=3, sepby(state_abb) noobs
drop _merge

* Merge
merge 1:1 fips using Output/counties
list state_abb county_name  totpop50 if _merge==2, sepby(state_abb) noobs
* La Paz, AZ—from Yuma 
* Carson city, NV—should be successor to Ormsby
* Chesapeake, VA 
* Franklin city, VA
* Manassas, VA
* Poquoson, VA
* Salem, VA 
list countyname_icpsr _merge if state_icpsr==65, clean noobs
* Cibola, NM—from Valencia
* Menominee, WI—Indian reservation
drop if _merge==1
sort state_abb county_name
list state_abb county_name samplesize if state_abb=="MO" | state_abb=="MD", sepby(state_abb) noobs
* St Louis and Baltimore cities are separate from counties
drop _merge countyname_icpsr
gen collegestudents602 = collegestudents60^2
gen ln_collegestudents60 = ln( cond(collegestudents60==0, 1, collegestudents60) )
gen sqrt_collegestudents60 = sqrt(collegestudents60)
gen pctcollegestudents60 = collegestudents60/pop60
corr ln_collegestudents60 pctcollegestudents60
file write r _n "Correlations in 1960 (Mazumder's sample)" _n
corr medianschoolyears60 pctcol60 if mazsample
file write r "Median school years & % college degree" _tab  %4.2f (el(r(C),2,1)) _n
corr medianschoolyears60 sqrt_collegestudents60 if mazsample
file write r "Median school years & square root of number of students" _tab  %4.2f (el(r(C),2,1)) _n
save Output/counties, replace

* NYT protest
replace county_name = "DeKalb" if county_name=="De Kalb" & (state_abb=="AL" | state_abb=="GA")
merge 1:1 state_abb county_name using Temp/nyt_events
list state_abb county_name if _merge==2, noobs sepby(state_abb)
drop if _merge==2
replace CR_eventcount = 0 if CR_eventcount==.
replace CR_participants = 0 if CR_participants==.
gen byte CR_protest = CR_eventcount>0
tab protest_indicator CR_protest  if mazsample
maketotal CR_eventcount if mazsample 
maketotal protest_event if mazsample 
gen lninc_CR_eventcount = ln(CR_eventcount+1)
gen lninc_CR_participants = ln(CR_participants+1)
gen CR_eventcount_pc = CR_eventcount / pop60
gen CR_participants_pc = CR_participants / pop60
gsort - CR_eventcount_pc
list county_name state_abb CR_eventcount CR_eventcount_pc in 1/10, clean noobs
gsort - CR_participants_pc
list county_name state_abb CR_participants CR_participants_pc CR_eventcount in 1/10, clean noobs
drop _merge
save, replace


************************************************************************************************************************************
************************************************************************************************************************************
**** ECOLOGICAL ANALYSIS
************************************************************************************************************************************
************************************************************************************************************************************

*********************************
* Set up Mazumder replication	*
*********************************

global countycontrols		"pctblack pcturban logtotpop1960 medincome avg_dem_vshare_pre"
global countycontrols_nogeo 	"$countycontrols pctlfag pctunemp60 medianage medschoolyrs pctoccupiedhousing" // 
global countycontrols_full 	"$countycontrols_nogeo latitude latitude2 longitude longitude2" // nydist nydist2
global countycontrols_noscale "$countycontrols_nogeo latitude_unscaled latitude_unscaled2 longitude_unscaled longitude_unscaled2"
global pctcolvar			"pctcol60"
global pctcolvars		"$pctcolvar ${pctcolvar}2"
global studentvar_raw	"collegestudents60"
global studentvar		"sqrt_$studentvar_raw"
global studentvars		"$studentvar"
global allaggvars 			"$countycontrols_full $pctcolvars $studentvars collegestudents60 lninc_CR_eventcount lninc_CR_participants"

foreach g in countycontrols countycontrols_nogeo countycontrols_full pctcolvar pctcolvars studentvar studentvars allaggvars  {
	global Z`g' = ""
*	di "`g'"
	foreach v of varlist $`g' {
*		di "   `v'"
		global Z`g' = "${Z`g'} Z`v'"
		}
	di "Z`g': ${Z`g'}"
	}

* Tidying
tab mazsample
list state_abb county_name if mazsample==0, clean noobs

* Inspect weights
summ samplesize if mazsample==1, det
di r(max)/r(min)
file write r _n _n "Ratio of highest to lowest weighted county" _tab %6.0fc (r(max)/r(min)) _n 
cumul samplesize [aweight=samplesize] if mazsample==1, gen(cum_sample)
destring totpop00, replace force
format totpop00 %9.0fc
gsort -mazsample +samplesize
list state_abb county_name samplesize totpop00 cum_sample in 1/10, clean noobs
gsort -mazsample -samplesize
list state_abb county_name samplesize totpop00 cum_sample in 1/10, clean noobs
count if mazsample==1
local total = r(N)
count if cum_sample>.5 & mazsample==1
file write r "50% of weight come from" _tab (r(N)) " counties" _n 

tab mazsample if state_abb=="VA"
tab mazsample [fweight=pop60] if state_abb=="VA"
summ pop60 if state_abb=="VA"
local denom = r(N)*r(mean)
summ pop60 if state_abb=="VA" & mazsample==0
file write r _n "Ommitted Virginia population" _tab %3.0f (r(N)*r(mean)/`denom'*100) "%" _n 

* Check Census variables
corr pctunemp60 pctunemp
list state_abb county_name pctunemp60 pctunemp if pctunemp==. & pctunemp60!=., clean noobs
corr pctlfag agpctemployed60 agpctlaborforce60
summ pctlfag agpctemployed60 agpctlaborforce60 if pctlfag!=.
corr pctoccupiedhousing over1perroompcthousing1960
summ pctoccupiedhousing ownerpcthousing60 over1perroompcthousing1960 if pctoccupiedhousing!=.

* Two pairs of counties 
list county_name totpop60 logtotpop1960 pctblack pop60 ln_pop60 pctblack60 ///
	if county_name=="Baltimore" | county_name=="Baltimore city", clean noobs
list county_name totpop60 logtotpop1960 pop60 ln_pop60 ///
	if (county_name=="St. Louis" | county_name=="St. Louis city") & state_abb=="MO", clean noobs
* Mazumder's totpop60 for Baltimore aggregates Baltimore and Baltimore city,
* but independent variables like logtotpop1960 pertain to Baltimore only!
* (Likewise for St Louis.)

* Check that other variables match Mazumder's 
list state_abb county_name pctblack pctblack60 if round(pctblack, .001)!=round(pctblack60, .001) & inM, clean noobs
list state_abb county_name pcturban pcturban60 if round(pcturban, .001)!=round(pcturban60, .001) & inM, clean noobs
list state_abb county_name logtotpop1960 ln_pop60 if round(logtotpop1960, .001)!=round(ln_pop60, .001) & inM, clean noobs
list state_abb county_name medincome medincome60 if round(medincome, .001)!=round(medincome60, .001) & inM, clean noobs

************************************************************************************************************************************

count if ln_protest_event==. & protest_indicator!=.
twoway histogram ln_protest_event
gen protest_event_pc = protest_event / pop60
gsort -mazsample -protest_event_pc
list state_abb county_name protest_event_pc protest_event in 1/10, clean noobs
gsort -mazsample -protest_event
list state_abb county_name protest_event in 1/10, clean noobs
file write r _n "Protest event frequency, highest values" _n
file write r %28s ("Mazumder") %7s ("Ours") _n
forvalues i = 1/10 {
	file write r %2.0f (`i') " " %-20s (county_name[`i'] + ", " + state_abb[`i']) %5.0f (protest_event[`i']) %7.0f (CR_eventcount[`i']) _n
	}
	
* Rescale variables
clonevar res = resent
replace res = (res-1) / 4 
summ res resent 
corr res resent

di "Binary variables: " _c
foreach var of varlist $allaggvars {
	summ `var'
	if r(min)!=0 | r(max)!=1  local nonbinary = "Y"
	if "`nonbinary'"!="Y" {
		levelsof `var', local(levels) 
		if "`levels'"!="0 1"  local nonbinary = "Y"
		}
	if "`nonbinary'"=="Y" {
		clonevar Z`var' = `var' 
		summ `var' if mazsample
		replace Z`var' = Z`var' / ( r(sd) * 2)
		local scale_`var' = r(sd)*2
		}
	else di "`var' " _c 
	}
di
	
gen byte max_sample_mazumder = 0
foreach dv in res aff dem {
	eststo: reg `dv' protest_indicator $Zcountycontrols i.state [aweight = samplesize_`dv'], vce(hc2)
	quietly summ `dv' if e(sample)
	local b_`dv'_protestindicator = _b[protest_indicator]
	gen byte sample_mazumder_`dv' = e(sample)
	replace max_sample_mazumder = e(sample) if max_sample_mazumder == 0
	niceoutputs using Output/ReplicationTables.xls, sheet(M1`dv') aicestat
	local AICvanilla_`dv' = r(aic)
	}
tab max_sample_mazumder

file write r _n "Partial correlations for Model 1 on racial resentment:" _n 
pcorr res protest_indicator $Zcountycontrols i.state ///
	[aweight = samplesize_res] if mazsample

matrix list r(p_corr)
local sum = 0
forvalues i = 2/6 {
	local sum = `sum' + r(p_corr)[`i',1]^2
	}
file write r "   All five control variables" _tab %4.3f (`sum') _n 
local statesum = 0
forvalues i = 8/54 {
	local statesum = `statesum' + r(p_corr)[`i',1]^2
	}
file write r "   All state intercepts" _tab %4.3f (`statesum') _n

file write r "Adding percent with college degrees"  _n
pcorr res protest_indicator $countycontrols $Zpctcolvar i.state  ///
	[aweight = samplesize_res] if mazsample	
local sum = 0
forvalues i = 2/6 {
	local sum = `sum' + r(p_corr)[`i',1]^2
	}
di "`sum'"
file write r "   Five control variables" _tab %4.3f (`sum') _n
file write r "   Percent with college degrees" _tab %4.3f (r(p_corr)[7,1]^2) _n

file write r "Adding square root of college students"  _n
pcorr res protest_indicator $countycontrols $Zstudentvars i.state  ///
	[aweight = samplesize_res] if mazsample
local sum = 0
forvalues i = 2/6 {
	local sum = `sum' + r(p_corr)[`i',1]^2
	}
di "`sum'"
file write r "   Five control variables" _tab %4.3f (`sum') _n
file write r "   Square root of college students" _tab %4.3f (r(p_corr)[7,1]^2) _n

save, replace

**********************************************************************************************************************************
**********************
* Predicting protest *
**********************

tab mazsample protest_indicator
list state_abb county_name if protest_indicator!=. & mazsample==0, clean noobs

reg lninc_CR_eventcount $countycontrols_full pctcol60 collegestudents60* i.state

* Approximate Mazumder Appendix Table 6
file write r _n "** Predicting protest" _n

reg protest_indicator $Zcountycontrols_full $Zpctcolvars $Zstudentvars i.state if mazsample, vce(hc2)
niceoutputs using Output/ReplicationTables.xls, sheet(M_PR)
getaic
local aic_noquad = r(aic)
graphcoef $studentvar_raw, t($Zstudentvars)

* Estimate effect for number of students
reg protest_indicator $Zcountycontrols_full $Zpctcolvars $studentvars i.state if mazsample, vce(hc2)
local lo = 100
local hi = 25000
foreach level in lo hi {
	local sqrt_`level' = sqrt(``level'') 
	}
margins, at($studentvar = (`sqrt_lo' `sqrt_hi')) noatlegend 
file write r "Effect of number of college students:" _n
file write r "Predicted probability at " %5.0fc (`lo') _tab %3.2f (r(table)[1,1]) _n
file write r "Predicted probability at " %5.0fc (`hi') _tab %3.2f (r(table)[1,2]) _n

************************************************************************************************************************************
***********************************
* Additional ecological variables *
***********************************

local row = 1
gen DV = ""
gen Treatment = ""
gen M1b = .
gen M1se = .
gen M1p = .
gen M2b = .
gen M2se = .
gen M2p = .
gen M2newp = .
gen N = .

foreach dv in aff res dem {
	foreach treatment in protest_indicator Zlninc_CR_eventcount Zlninc_CR_participants {
		if "`dv'"=="aff" 		replace DV = "Support for affirmative action" in `row'
		else if "`dv'"=="res" 	replace DV = "Racial resentment" in `row'
		else if "`dv'"=="dem" 	replace DV = "Identification with Democrats" in `row'
		if "`treatment'"=="protest_indicator"			replace Treatment = "Binary" in `row'
		else if "`treatment'"=="Zlninc_CR_eventcount"	replace Treatment = "Event count—logged" in `row'
		else if "`treatment'"=="Zlninc_CR_participants"	replace Treatment = "Total participants—logged" in `row'
		if "`treatment'"=="Zlninc_CR_eventcount" local suffix "E"
		else if "`treatment'"=="Zlninc_CR_participants" local suffix "P"
		reg `dv' `treatment' $Zcountycontrols i.state ///
				[aweight = samplesize_`dv'] if mazsample, vce(hc2)
		replace M1b = _b[`treatment'] in `row'
		replace M1se = _se[`treatment'] in `row'
		test `treatment'
		replace M1p = r(p) in `row'
		reg `dv' `treatment' $Zcountycontrols_full $Zpctcolvars $Zstudentvars i.state ///
				[aweight = samplesize_`dv'] if mazsample, vce(hc2)
		niceoutputs using Output/ReplicationTables.xls, sheet(M2`dv'`suffix')
		replace M2b = _b[`treatment'] in `row'
		replace M2se = _se[`treatment'] in `row'
		test `treatment'
		replace M2p = r(p) in `row'
		test $Zpctcolvars $Zstudentvars
		replace M2newp = r(p) in `row'		
		replace N = e(N) in `row'
		local row = `row' + 1

		}
	}
export excel DV-N using Output/ReplicationTables if _n<=`row', first(var) sheet(csumm, replace)

* Model 2: calculate effects using raw variables
file write r _n _n "** TABLE 1, MODEL 2 **" _n 
local dv 	"aff"
reg `dv' protest_indicator $countycontrols_full c.$pctcolvar##c.$pctcolvar $studentvars i.state ///
	[aweight = samplesize_`dv'] if mazsample, vce(hc2)
test $pctcolvar c.$pctcolvar#c.$pctcolvar
file write r "College degree pct & quadratic: p" _tab %4.3g (r(p)) _n
summ $pctcolvar if mazsample [aw=samplesize_`dv'], det
local p10 = r(p10)
local p90 = r(p90)
margins, at($pctcolvar = (`p10' `p90') ) 
file write r "Going from " %2.0f (`p10') "% to " %2.0f (`p90') "% college degree changes DV by " ///
	_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
margins, at($pctcolvar = (5 15) ) 
file write r "Going from 5% to 15% college degree changes DV by " ///
	_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
_pctile $pctcolvar if mazsample [aw=samplesize_`dv'], percentiles(1(1)99)
foreach x in 5 15 {
	local bestdiff = .
	forvalues i = 1(1)99 {
		local diff = abs(`x' - r(r`i')) 
		if `diff' < `bestdiff' {
			local bestdiff = `diff'
			local closest = `i'
			}
		}
	file write r (`x') "% is the `closest' percentile (difference = " %4.2f (`bestdiff') ")" _n
	}
summ $studentvar if mazsample [aw=samplesize_`dv'], det
local p10 = r(p10)
local p90 = r(p90)
margins, at($studentvar = (`p10' `p90') ) 
file write r _n "Going from " %5.0fc (`p10'^2) " to " %7.0fc (`p90'^2) " college students changes DV by " ///
	_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
local lo = 100
local hi = 25000
foreach level in lo hi {
	local sqrt_`level' = sqrt(``level'') 
	}
margins, at($studentvar = (`sqrt_lo' `sqrt_hi')) noatlegend 
file write r "Going from " %5.0fc (`lo') " to " %7.0fc (`hi') " college students changes DV by " ///
	_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
_pctile $studentvar_raw if mazsample [aw=samplesize_`dv'], percentiles(1(1)99)
foreach x in `lo' `hi' {
	local bestdiff = .
	forvalues i = 1(1)99 {
		local diff = abs(`x' - r(r`i')) 
		if `diff' < `bestdiff' {
			local bestdiff = `diff'
			local closest = `i'
			}
		}
	file write r (`x') " is the `closest' percentile (difference = " %4.2f (`bestdiff') ")" _n
	}

* Model 2: do all possible combinations
local dv "aff"
tryoutvars  ///
	(pctblack60 c.pctblack60##c.pctblack60) ///
	(latitude c.latitude##c.latitude) ///
	(longitude c.longitude##c.longitude) ///
	(ln_collegestudents60 sqrt_collegestudents60 c.collegestudents60##c.collegestudents60 pctcollegestudents60 c.pctcollegestudents60##c.pctcollegestudents60) ///
	(pctcol60 c.pctcol60##c.pctcol60)  ///
	using "Output/tryout_`dv'" if mazsample, ///
	command(reg) depvar(`dv') targetvar(protest_indicator) ///
	fixedvars(pcturban logtotpop1960 medincome avg_dem_vshare_pre pctlfag pctunemp60 medianage medschoolyrs pctoccupiedhousing i.state) ///
	option("vce(hc2)") weighting("[aweight = samplesize_`dv']") dropping pvar(pvalue)
file write r _n "* Trying out " (r(totalcombi)) " permutations:" _n
foreach metric in p_min p_max p_mean {
	file write r "`metric':" %6.3f (r(`metric')) _n
	}
twoway histogram pvalue, ///
	freq width(.01) start(0) ///
	color(gs8) lcolor(gs8) ylabel("") ///
	xlabel(0(.1).4,labsize(medlarge)) xtick(0(.05).45) ///
	xtitle("{it:p}-value", size(medlarge)) ///
	xscale(range(0 .45) noextend) ///
	yscale(off) ///
	title("{bf:Figure A1.} Statistical significance of Civil Rights protest under" ///
			"permutations of variables in Table 1, Model 2", size(5) color(black)) ///
	yscale(off) ///
	plotregion(margin(l=0 r=0 b=0 t=3))  graphregion(fcolor(white) lcolor(white))
graph export Output/Figure_A1_pvalues.png, as(png) replace
	
* Calculate effect for resentment
file write r _n _n "** RESENTMENT PREDICTED BY NUMBER OF PROTEST EVENTS, MODEL 2 **" _n 
reg res Zlninc_CR_eventcount $Zcountycontrols_full $Zpctcolvars $Zstudentvars i.state ///
	[aweight = samplesize_res] if mazsample, vce(hc2)
summ CR_eventcount if mazsample [aw=samplesize_res], det
local op10 = r(p10)
local op90 = r(p90)
summ Zlninc_CR_eventcount if mazsample [aw=samplesize_res], det
local p10 = r(p10)
local p90 = r(p90)
margins, at(Zlninc_CR_eventcount = (`p10' `p90') )
local change = r(table)[1,2] - r(table)[1,1] 
file write r _n "Going from " (`op10') " (10th %ile) to " (`op90') " (90th %ile) protest events changes resentment by " ///
		_tab %3.2f (`change')
summ res if mazsample [aw=samplesize_res]
file write r " (" %3.2f (`change'/r(sd)) " of s.d.)"	_n
	
************************************************************************************************************************************
************************************************************************************************************************************
**** PROCESS CCES SURVEY DATA 
************************************************************************************************************************************
************************************************************************************************************************************

local keepvars "fips zipcode affirmativeaction* partyid sex birthyear race income education religion weight"

* 2006
use Input/CCES/cces_2006_common, clear
rename v1004 fips
rename v5047 zipcode 
destring zipcode, gen(x) force
tab zipcode if x==.
destring zipcode, replace force
label list V3027
tab v3027   // 7-point scale
gen affirmativeaction = (v3027 - 1) / 6
tab v3027 affirmativeaction, miss
decode v3007, gen(partyid)
decode v2004, gen(sex)
rename v2020 birthyear
decode v2005, gen(race)
decode v2032, gen(income)
decode v2018, gen(education)
decode v2022, gen(religion)
rename v1001 weight
keep `keepvars'
gen int surveyyear = 2006
save Output/respondents, replace

* 2008
use Input/CCES/cces_2008_common, clear
gen fips = real(V251)*1000 + V269 
rename V202 zipcode 
label list LABG
tab CC313   // 4-point scale
gen affirmativeaction = (CC313 - 1) / 3
tab CC313 affirmativeaction, miss
decode CC307a, gen(partyid)
decode V208, gen(sex)
rename V207 birthyear
decode V211, gen(race)
decode V246, gen(income)
decode V213, gen(education)
decode V219, gen(religion)
rename V201 weight
keep `keepvars'
gen int surveyyear = 2008
append using Output/respondents
save Output/respondents, replace

* 2009
use Input/CCES/cces09_cmn_output_2, clear
gen fips = real(v265)*1000 + v269
rename v253 zipcode // residence 
label list cc09_55
tab cc09_55   // 4-point scale
gen affirmativeaction = (cc09_55 - 1) / 3
tab cc09_55 affirmativeaction, miss
decode cc424, gen(partyid)
decode v208, gen(sex)
rename v207 birthyear
decode v211, gen(race)
replace race = strtitle(race)
decode v246, gen(income)
decode v213, gen(education)
decode v219, gen(religion)  // Pew
rename v200 weight
keep `keepvars'
gen int surveyyear = 2009
append using Output/respondents
save Output/respondents, replace

* 2010
use Input/CCES/cces_2010_common_validated, clear
rename V277 fips
rename V202 zipcode 
label list CC327
tab CC327, miss  // 4-point scale
gen affirmativeaction = (CC327 - 1) / 3
tab CC327 affirmativeaction, miss
rename CC422a racialresentment_irish
rename CC422b racialresentment_slavery
decode V212d, gen(partyid)
decode V208, gen(sex)
rename V207 birthyear
decode V211, gen(race)
decode V246, gen(income)
decode V213, gen(education)
decode V219, gen(religion)   // Pew
label list V219
rename V101 weight
tab affirmativeaction
tab income, miss
destring fips, replace 
keep `keepvars' racialresentment*
gen int surveyyear = 2010
append using Output/respondents
save Output/respondents, replace

* 2011
use Input/CCES/CCES11_Common_OUTPUT.dta, clear
gen fips = real(V302)*1000 + V303
rename V279 zipcode 
destring zipcode, gen(x) force
tab zipcode if x==.
destring zipcode, replace force
label list CC354 
tab CC354, miss  // 4-point scale
gen affirmativeaction = (CC354 - 1) / 3
tab CC354 affirmativeaction, miss
rename CC359 racialresentment_irish // only 1!
decode V212d, gen(partyid)
decode V208, gen(sex)
rename V207 birthyear
decode V211, gen(race)
decode V246, gen(income)
decode V213, gen(education)
decode V219, gen(religion) // Pew
label list V219
rename V101 weight
destring fips, replace 
keep `keepvars' racialresentment*
gen int surveyyear = 2011
append using Output/respondents
save Output/respondents, replace

* Unify
tab race surveyyear, miss
keep if race=="White"
drop race

by fips, sort: egen Cwrespondents = total(1)
label var Cwrespondents "Number of white CCES respondents in county"
by fips, sort: egen Cwweights = total(weight)
label var Cwweights "Total weight of white CCES respondents in county"

drop if (fips>=2000 & fips<3000) | (fips>=15000 & fips<16000)

tab racialresentment_irish surveyyear [aw=weight], miss nofreq col
tab racialresentment_slavery surveyyear [aw=weight], miss nofreq col
gen byte res = .
replace res = 3 - (racialresentment_irish - racialresentment_slavery) / 2 if surveyyear==2010
replace res = 6 - racialresentment_irish if surveyyear==2011
by surveyyear, sort: summ res [aw=weight]
label var res "Agrees blacks should work like Irish; (& 2010) disagrees that slavery hinders them"

tab affirmativeaction surveyyear [aw=weight], miss nofreq col
gen byte aff = cond(affirmativeaction==., ., affirmativeaction < .5)
label var aff "Supports affirmative action (binary)"
tab aff surveyyear [aw=weight], miss col nofreq // 2008 is more supportive, Obama?!
tab affirmativeaction aff [aw=weight], miss
rename affirmativeaction affscale
replace affscale = 1 - affscale
label var affscale "Supports affirmative action (0-1)"

replace partyid = strtrim(strtitle(partyid))
tab partyid surveyyear [aw=weight], miss nofreq col
gen byte dem = cond( partyid=="", ., strpos(partyid, "Democrat")!=0 )
label var dem "Identifies with Democratic party"
tab partyid dem [aw=weight], miss nofreq col
gen byte dem_miss = strpos(partyid, "Democrat")!=0
label var dem_miss "Identifies with Democratic party (missing treated as non Democrat)"

gen byte demscale = .
replace demscale = 0   if partyid=="Strong Republican"
replace demscale = 1/6 if partyid=="Weak Republican" | partyid=="Not Very Strong Republican"
replace demscale = 2/6 if partyid=="Lean Republican"
replace demscale = 3/6 if partyid=="Independent" |  partyid=="Not Sure"
replace demscale = 4/6 if partyid=="Lean Democrat"
replace demscale = 5/6 if partyid=="Weak Democrat" | partyid=="Not Very Strong Democrat"
replace demscale = 1   if partyid=="Strong Democrat"
tab demscale partyid [aw=weight], miss nofreq col
drop partyid

replace religion = strtitle(religion)
tab religion surveyyear [aw=weight], miss nofreq col
replace religion = "Catholic" if religion == "Roman Catholic"
replace religion = "None" if religion == "Agnostic"
replace religion = "None" if religion == "Atheist"
replace religion = "None" if religion == "Nothing In Particular"
replace religion = "Other Christian" if religion == "Eastern Or Greek Orthodox"
replace religion = "Other Christian" if religion == "Mormon"
replace religion = "Some Other Religion" if religion == "Muslim"
replace religion = "Some Other Religion" if religion == "Buddhist"
replace religion = "Some Other Religion" if religion == "Hindu"
replace religion = "Some Other Religion" if religion == "Something Else"
tab religion surveyyear  [aw=weight], miss col nofreq
* Other Christianity seems to absorb some Protestant!
gen byte relig_catholic = cond(religion=="", ., religion=="Catholic")
gen byte relig_otherchristian = cond(religion=="", ., religion=="Other Christian")
gen byte relig_jewish = cond(religion=="", ., religion=="Jewish")
gen byte relig_other = cond(religion=="", ., religion=="Some Other Religion")
gen byte relig_none = cond(religion=="", ., religion=="None")
summ relig_*
encode religion, gen (religioncoded)
label var religioncoded "Religion"
drop religion

replace education = strtitle(education)
tab education surveyyear [aw=weight], miss nofreq col
label define educ_label 1 `"No Hs"' 2 `"High School Graduate"' 3 `"Some College"' 4 `"2-Year"' 5 `"4-Year"' 6 `"Post-Grad"'
encode education, gen(educat) label(educ_label)
drop education

replace income = lower(income)
tab income surveyyear  [aw=weight], miss nofreq col
gen inc = .
replace inc =  7.5 if income == "less than $10,000"
replace inc = 12.5 if income == "$10,000 - $14,999"
replace inc = 17.5 if income == "$15,000 - $19,999"
replace inc = 22.5 if income == "$20,000 - $24,999"
replace inc = 27.5 if income == "$25,000 - $29,999"
replace inc = 35   if income == "$30,000 - $39,999" 
replace inc = 45   if income == "$40,000 - $49,999"
replace inc = 55   if income == "$50,000 - $59,999" 
replace inc = 65   if income == "$60,000 - $69,999" 
replace inc = 75   if income == "$70,000 - $79,999"
replace inc = 90   if income == "$80,000 - $99,999"
replace inc = 110  if income == "$100,000 - $119,999"
replace inc = 135  if income == "$120,000 - $149,999"
replace inc = 200  if income == "$150,000 or more"

replace inc =  15 if income == "$10,000 - $19,999" 
replace inc =  25 if income == "$20,000 - $29,999" 
replace inc =  35 if income == "$30,000 - $39,999" 
replace inc =  45 if income == "$40,000 - $49,999" 
replace inc =  55 if income == "$50,000 - $59,999" 
replace inc =  65 if income == "$60,000 - $69,999" 
replace inc =  75 if income == "$70,000 - $79,999" 
replace inc =  85 if income == "$80,000 - $99,999" 
replace inc = 110 if income == "$100,000 - $119,999" 
replace inc = 135 if income == "$120,000 - $149,999" 
replace inc = 175 if income == "$150,000 - $199,999" 
replace inc = 225 if income == "$200,000 - $249,999" 
replace inc = 300 if income == "$250,000 - $349,999" 
replace inc = 425 if income == "$350,000 - $499,999" 
replace inc = 700 if income == "$500,000 or more" 

tab income inc [aw=weight], miss nofreq col
gen income_miss = inc==.
gen ln_income = ln(inc)
summ ln_income [aw=weight]
replace ln_income = r(mean) if income_miss==1
drop income
rename inc income

tab sex surveyyear [aw=weight], miss nofreq col
replace sex = lower(sex)
gen byte female = cond( sex=="female", 1, cond(sex=="male", 0, .), . )  
label var female "Female"
tab sex female [aw=weight], miss nofreq col
drop sex

gen age = surveyyear - birthyear
gen age2 = age^2
label var age2 "Age squared"

compress
save Output/respondents, replace

************************************************************************************************************************************
************************************************************************************************************************************
* INDIVIDUAL ANALYSIS
************************************************************************************************************************************
************************************************************************************************************************************

file write r _n "*****************************" _n
file write r    "**** INDIVIDUAL ANALYSIS ****" _n
file write r    "*****************************" _n

use Output/counties, clear
keep fips state_abb county_name southconfed protest_indicator protest_event ln_protest_event resent affirm dem ///
	samplesize sample_mazumder_* max_sample_mazumder ///
	$allaggvars
rename samplesize Msamplesize
rename sample_mazumder_res Mcase_res
rename sample_mazumder_aff Mcase_aff
rename sample_mazumder_dem Mcase_dem
rename resent Mres_mean
rename affirm Maff_pct
rename dem Mdem_pct
encode state_abb, gen(state_id)

merge 1:m fips using Output/respondents
sort state_abb county_name
tab fips if _merge==2
list state_abb county_name Msamplesize if _merge==1, sepby(state_abb) noobs
drop _merge
egen fips_tag = tag(fips)

by fips, sort: egen Cres_meanweighted = total(res * weight)
by fips, sort: egen denom = total(cond(res==., ., weight))
replace Cres_meanweighted = Cres_meanweighted / denom
drop denom
corr Mres_mean Cres_meanweighted if fips_tag 
summ Mres_mean Cres_meanweighted if fips_tag & Mres_mean!=.

clonevar res_unscaled = res
replace res = (res-1)/4

count if weight==0
drop if weight==0 // mixed includes observations with zero weight in the total N, but melogit does not

save Output/respondents, replace

count
file write r _n "** Total number of respondents" _tab %7.0fc (r(N)) _n
count if surveyyear==2010 |  surveyyear==2011
file write r "   2010-11" _tab %6.0fc (r(N)) _n

tab educat aff [aw=weight], row
file write r _n "% of whites supporting affirmative action:" _n

by fips, sort: egen county_degreepct = total(cond(educat>4 & educat!=., weight, 0))
by fips, sort: egen denom = total(cond(educat!=., weight, 0))
replace county_degreepct = county_degreepct / denom
corr county_degreepct pctcol60 if fips_tag [aweight=denom]
corr county_degreepct pctcol60 if fips_tag
gen degree = cond(educat!=., educat>4, .)
logistic degree pctcol60 [pweight=weight]
logistic degree pctcol60 pctcol602 [pweight=weight]

svyset [pweight = weight]
svy: tab aff degree, col
file write r "No degree" _tab %3.0f ( e(Prop)[2,1] / (e(Prop)[1,1] + e(Prop)[2,1]) *100 ) _n
file write r "Degree" _tab %3.0f    ( e(Prop)[2,2] / (e(Prop)[1,2] + e(Prop)[2,2]) *100 ) _n

svy: tab educ
file write r _n  %3.0f ( e(Prop)[1,1] *100 ) "% of respondents have no HS" _n

quietly summ pctcol60 if fips_tag, det
local p10 = r(p10)
local p90 = r(p90)
xtile q = pctcol60, n(5)
file write r _n "Proportion of whites holding college degrees by county 1960 % college degrees" _n
svy: proportion degree if pctcol60<`p10'
file write r "Bottom decile" _tab %3.0f ( e(b)[1,2] *100) _n
svy: proportion degree if pctcol60>`p90'
file write r "Top decile" _tab %3.0f ( e(b)[1,2] *100) _n

logistic degree c.pctcol60##c.pctcol60 [pweight=weight]
summ pctcol60 if fips_tag, det
margins, at( pctcol60 = (`r(p10)' `r(p90)') ) noatlegend


global individual_educ 	"ib2.educat"
global individualvars 	"$individual_educ age age2 female ln_income income_miss relig_*" //  
global Zindividualvars 	"$individual_educ Zage Zage2 female Zln_income income_miss relig_*" //  
global fixedstuff		"i.surveyyear i.state_id"
global clusterstuff		"[aw=weight], vce(cluster fips)"
global command			"reg"


local fulldvlist 		"res aff affscale dem"
foreach dv in `fulldvlist' {
*	global clusterstuff_`dv'	"$clusterstuff, pweight(samplesize_`dv')"
	global clusterstuff_`dv'	"$clusterstuff"
	di "${clusterstuff_`dv'}"
	}

* Rescale continuous and calculate county sample size
gen byte ind_maxsample = 0
label var ind_maxsample "Respondent used in one or more analyses"
gen byte ind_minsample = 1
label var ind_minsample "Respondent used in all analyses"
foreach dv in `fulldvlist' { 
	reg `dv' $individualvars $countycontrols_full $pctcolvars protest_indicator
	gen byte ind_sample_`dv' = e(sample)
	replace ind_maxsample = 1 if e(sample)
	replace ind_minsample = 0 if !e(sample)	
	by fips, sort: egen samplesize_`dv' = total(weight * e(sample))
	replace samplesize_`dv' = . if samplesize_`dv'==0
	}
count if ind_minsample
count if ind_maxsample
tab fips_tag
drop fips_tag
gsort - ind_minsample - ind_maxsample
egen fips_tag = tag(fips) 

summ Cwweights Msamplesize if fips_tag & Msamplesize!=. 
corr Cwweights Msamplesize if fips_tag
tab ind_maxsample
by fips, sort: egen county_maxsample = total(ind_maxsample)

list county_name state_abb Cwrespondents if county_maxsample==0 & max_sample_mazumder==1 & fips_tag, clean noobs
misstable summarize aff res dem educat age age2 female ln_income income_miss relig_* ///
	$countycontrols_full $pctcolvar protest_indicator if county_maxsample==0 & max_sample_mazumder==1
* Two counties each with single respondent are ommitted
count if max_sample_mazumder==1 & fips_tag==1
count if max_sample_mazumder==1
file write r _n "Respondents in Mazumder's counties:" _tab %9.0fc (r(N)) _n
local denom = r(N)
count if ind_sample_aff==1
file write r "Total respondents in analysis of support for affirmative action:" _tab %9.0fc (r(N)) _n
file write r "% missing" _tab %3.1f ( (1-(r(N)/`denom'))*100 ) _n

* Recalculate weights
by surveyyear, sort: summ weight if ind_maxsample
by surveyyear, sort: egen meanweight = mean(cond(ind_maxsample, weight, .))
clonevar oldweight = weight
replace weight = weight * (1 / meanweight)
by surveyyear, sort: summ weight oldweight if ind_maxsample

summ ln_income if ind_maxsample [aw=weight], det
replace ln_income = r(mean) if income_miss==1
local contvarlist "age age2 ln_income $allaggvars"
foreach contvar in `contvarlist' {
	clonevar Z`contvar' = `contvar'
	summ `contvar' if ind_maxsample [aw=weight]
	replace Z`contvar' = Z`contvar' / ( r(sd) * 2)
	}
foreach agevar in age {
	summ `agevar' if ind_maxsample [aw=weight], det
	local `agevar'_p10 = r(p10)
	local `agevar'_p90 = r(p90)
	}

* Redo Mazdumder's aggregate model with different weights
reg Maff_pct $countycontrols protest_indicator i.state_id [aweight = Msamplesize] if fips_tag, vce(hc2)
by fips, sort: egen county_respondents_aff = total(aff!=.)
summ county_respondents_aff if fips_tag & e(sample), det
gen sqrt_county_respondents_aff = sqrt(county_respondents_aff)
reg Maff_pct $countycontrols protest_indicator i.state_id [aweight = sqrt_county_respondents_aff] if fips_tag, vce(hc2)
niceoutputs using Output/ReplicationTables.xls, sheet(M1wsr)
file write r _n "Aggregate Model 1 weighting by root of sample size" _n
file write r "Coef for protest_indicator:" _tab %4.3f (_b[protest_indicator]) _n
test protest_indicator
file write r "p-value for protest_indicator:" _tab %4.3f (r(p))



************************************************************************************************************************************
************************************************************************************************************************************

* MAZUMDER'S ORIGINAL MODEL AT INDIVIDUAL LEVEL
capture: program drop domodel3
program domodel3, rclass
syntax varname

local dv "`varlist'"

$command `dv' $Zcountycontrols protest_indicator $fixedstuff ///
	if ind_maxsample ${clusterstuff_`dv'}
niceoutputs using Output/ReplicationTables.xls, sheet(M3`dv') aicestat

quietly capture desc Mcase_`dv'
if _rc==0	list state_abb county_name fips Cwrespondents if e(sample)==0 & Mcase_`dv'==1 & fips_tag==1, clean noobs

return scalar protesteffect = _b[protest_indicator]

end
	
************************************************************************************************************************************
* FINAL MODEL: INDIVIDUAL + COLLEGE

capture: program drop domodel4
program define domodel4, rclass
syntax varname, treatment(varname) [file(string) sheet(string) south forgraph addvars(varlist)]

local dv "`varlist'"

file write r _n _n "** DV: `dv'   Treatment: `treatment'; " 
file write r (  cond("`addvars'"!="", "Additional: `addvars'", "")  ) _n

$command `dv' $Zindividualvars $Zcountycontrols_full `treatment' $Zpctcolvars $Zstudentvars `addvars' $fixedstuff ${clusterstuff_`dv'} 
if "`file'"!="" 	niceoutputs using "`file'", sheet("`sheet'") aicestat
else 				getaic
local aicbase = r(aic)
return scalar protesteffect = _b[`treatment']

if "`south'"!="" {
	$command `dv' southconfed##c.($Zindividualvars $Zcountycontrols_full `treatment' $Zpctcolvars $Zstudentvars) $fixedstuff ${clusterstuff_`dv'}
	getaic
	local aicsouth = r(aic)
	file write r "South interaction:" _n
	file write r "  protest: coef" _tab %3.2f (_b[`treatment']) _n
	test `treatment'
	file write r "  p-value" _tab %3.2f (r(p)) _n
	test 1.southconfed#c.`treatment'	
	file write r "  P-value for protest x South interaction" _tab %3.2f (r(p)) _n
	test (1.southconfed#c.`treatment' + `treatment') = 0	
	file write r "  P-value for South protest" _tab %3.2f (r(p)) _n
	file write r "  AIC improvement" _tab %2.0f (`aicbase' - `aicsouth') _n
	}
	
if "`forgraph'"!="" {
	$command `dv' $Zindividualvars $Zcountycontrols_full `treatment' ///
			c.${pctcolvar}##c.${pctcolvar} $studentvars ///
			$fixedstuff ${clusterstuff_`dv'}
	margins, at(${pctcolvar} = (5 15) ) noatlegend
	file write r _n "Going from 5% to 15% college degree changes DV by " ///
		_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
	local lo = 100
	local hi = 25000
	foreach level in lo hi {
		local sqrt_`level' = sqrt(``level'') 
		}
	margins, at($studentvar = (`sqrt_lo' `sqrt_hi')) noatlegend 
	file write r "Going from `lo' to `hi' college students changes DV by " ///
		_tab %3.2f (r(table)[1,2] -r(table)[1,1]) _n
	}
end
	
************************************************************************************************************************************

capture: program drop dograph
program define dograph

syntax, [suffix(string) highres]

local dv = e(depvar)

if "`highres'" == "" {
	local recastvalue 	"line"
	local plotopsvalue	"lcolor(black)"
	}
else {
	local recastvalue 	"scatter"
	local plotopsvalue	"mcolor(black) msize(vsmall)"
	}

local textsize			"4.12"	

if "`dv'" == "aff" {
	local graphtitle	"{bf:Figure 1.} Whites’ support for affirmative action, 2006–11"
	local ylabels		".2 .3 .4 .5"
	local ytitle		"Predicted probability of support"
	local graphname 	"Figure_1"
	local ycommmon		"ycommon"
	}
if "`dv'" == "res" {
	local graphtitle	"{bf:Figure A2.} Whites’ racial resentment, 2010–11"
	local ylabels		".6 .7"
	local ytitle		"Predicted resentment (0-1 scale)"
	local graphname 	"Figure_A2"
	local yscale		"yscale(range(.56 .78))"
	}
if "`dv'" == "dem" {
	local graphtitle	"{bf:Figure A3.} Whites’ partisan identity, 2006–11"
	local ylabels		".4 .5 .6"
	local ytitle		"Predicted probability of Democrat identity"
	local graphname 	"Figure_A3"
	local yscale		"yscale(range(.33 .63))"
	}

* COLLEGE STUDENTS
if "`highres'"!="" {
	quietly levelsof $studentvar if ind_maxsample, local(atvalues)
	}
else {
	quietly summ $studentvar [aw=weight] if ind_maxsample, det
	local atvalues "`r(p5)' `r(p10)' `r(p25)' `r(p50)' `r(p75)' `r(p90)' `r(p95)'"
	local mean = r(mean)
	local min = r(min)
	local max = r(max)
	local inc = (r(max) - r(min)) / 10
	forvalues x = `min'(`inc')`max' {
		local atvalues "`atvalues' `x'"
		}
	}
margins, at($studentvar=(`atvalues')) vce(unconditional) noatlegend
tempvar x prob l u
gen `x'=.
gen `prob'=.
gen `l'=.
gen `u'=.
local n = wordcount("`atvalues'") 
forvalues i = 1/`n' {
	local value : word `i' of `atvalues'
	local value = `value' ^2
	quietly replace `x' = `value' in `i'
	quietly replace `prob' = (r(table)[1, `i']) in `i'
	quietly replace `l' = (r(table)[5, `i']) in `i'
	quietly replace `u' = (r(table)[6, `i']) in `i'
	di "`value': " (`prob'[`i']) " (" (`l'[`i']) " ... " (`u'[`i']) ")"
	}
quietly summ $studentvar_raw [aw=weight] if ind_maxsample, det
forvalues p = 10(40)90 { 
	local p`p' = `r(p`p')'
	}
twoway ///
	(rarea `l' `u' `x', ///
		sort color(gs13) lcolor(gs13))	///
	(scatter `prob' `x', ///
		sort msize(vsmall) mcolor(black) ), 	///
	plotregion( margin(zero) ) ///	
	legend(off)  ///
	graphregion(fcolor(white) lcolor(white)) ///
	ytitle("") ///
	ylabel(`ylabels', labsize(`textsize') angle(horizontal)) `yscale' ///
	xlabel(0(50000)100000, format(%7.0fc) labsize(`textsize')) xmtick(0(25000)125000)   ///
	xtitle("Students in college, 1960", size(`textsize') margin(t=1 r=3) ) ///
	xline(`p10', lcolor(gs10)) xline(`p50', lcolor(gs10))  xline(`p90', lcolor(gs10)) ///
	title("") plotregion(margin(zero)) graphregion(fcolor(white) lcolor(white) margin(zero)) legend(off) ///
	name(students, replace)

* COLLEGE DEGREES
quietly summ ${pctcolvar} [aw=weight] if ind_maxsample, det
forvalues p = 10(40)90 { 
	local p`p' = `r(p`p')'
	}		
if "`highres'"!="" {
	quietly levelsof ${pctcolvar} if ind_maxsample, local(atvalues)
	}
else {
	local atvalues "`r(min)' `r(p5)' `r(p10)' `r(p25)' `r(p50)' `r(p75)' `r(p90)' `r(p95)' `r(max)'"
	local mean = r(mean)
	local max = r(max)
	local x = r(min) + 1
	while `x' < `max' {
		local atvalues "`atvalues' `x'"
		local x = `x' + 1
		}
	}
margins, at(${pctcolvar}=(`atvalues')) vce(unconditional) noatlegend

marginsplot, recast(`recastvalue') recastci(rarea) ///
	plotopts(`plotopsvalue') ciopts(color(gs13))  ///
	xlabel(0 "0%" 10 "10%" 20 "20%" , labsize(`textsize')) xmtick(0(5)25)   ///
	ytitle("") ///
	ylabel(`ylabels', labsize(`textsize') angle(horizontal)) `yscale' ///
	xtitle("Adults with 4 years of college, 1960", size(`textsize') margin(t=1 r=3) ) ///
	xline(`p10', lcolor(gs10))  xline(`p50', lcolor(gs10))  xline(`p90', lcolor(gs10)) ///
	title("") plotregion(margin(zero)) graphregion(fcolor(white) lcolor(white) margin(zero)) legend(off) ///
	name(educ, replace) 		

* PROTEST
margins, at (protest_indicator=(0 1))  vce(unconditional) noatlegend
marginsplot, recast(scatter) recastci(rcap) ///
	plotopts( mcolor(black) lcolor(black) msize(small) ) ciopts( color(black) lwidth(medthick) ) ///
	ytitle("") ///
	xscale(range(-.5 1.5)) ///
	xlabel(, labsize(`textsize'))   ///   0 "None" 1 "Protest"
	ylabel(`ylabels', labsize(`textsize') angle(horizontal))  `yscale' ///
	xtitle("Civil Rights protest, 1960–65", size(`textsize') margin(t=1)) ///
	title("") plotregion(margin(zero) lcolor(white)) ///
	graphregion(fcolor(white) lcolor(white) margin(zero)) legend(off) ///
	name(protest, replace) 			
*			fxsize(37)

graph save Temp/graph_`dv'_protest, replace
graph combine educ protest students, ` ycommon'  ///
	title("`graphtitle'", color(black) margin(b=3) size(4)) ///
	plotregion(margin(zero)) ///
	graphregion(fcolor(white) lcolor(white) margin(tiny)) imargin(1 1 4 0) /// margin(tiny) altshrink 
	l1title("`ytitle'", size(medsmall) justification(center)) ///
	xsize(6) ysize(6)
graph export Output/`graphname'_`dv'`highres'.png, as(png) replace
graph save Temp/graph_`dv', replace

		
end	

************************************************************************************************************************************
************************************************************************************************************************************

* Call programs

** Table 3, Model 3 **
domodel3 aff

** Table 3, Model 4
domodel4 aff, treatment(protest_indicator) file("Output/ReplicationTables") sheet("M4aff") south forgraph
if "$longtime"=="" 	dograph
else			 	dograph, highres

** Table 4
local row = 1
gen DV = ""
gen Treatment = ""
gen M3b = .
gen M3se = .
gen M3p = .
gen M4b = .
gen M4se = .
gen M4p = .
gen M4newp = .
gen N = .

foreach dv in aff res dem {
	foreach treatment in protest_indicator Zlninc_CR_eventcount Zlninc_CR_participants {
		if "`dv'"=="aff" 		replace DV = "Support for affirmative action" in `row'
		else if "`dv'"=="res" 	replace DV = "Racial resentment" in `row'
		else if "`dv'"=="dem" 	replace DV = "Identification with Democrats" in `row'
		if "`treatment'"=="protest_indicator"			replace Treatment = "Binary" in `row'
		else if "`treatment'"=="Zlninc_CR_eventcount"	replace Treatment = "Event count—logged" in `row'
		else if "`treatment'"=="Zlninc_CR_participants"	replace Treatment = "Total participants—logged" in `row'
		if "`treatment'"=="Zlninc_CR_eventcount" local suffix "E"
		else if "`treatment'"=="Zlninc_CR_participants" local suffix "P"
		reg `dv' $Zindividualvars $Zcountycontrols `treatment' ///
			$fixedstuff ${clusterstuff_`dv'} 
		replace M3b = _b[`treatment'] in `row'
		replace M3se = _se[`treatment'] in `row'
		test `treatment'
		replace M3p = r(p) in `row'
		reg `dv' $Zindividualvars $Zcountycontrols_full `treatment' $Zpctcolvars $Zstudentvars ///
			$fixedstuff ${clusterstuff_`dv'} 
		replace M4b = _b[`treatment'] in `row'
		replace M4se = _se[`treatment'] in `row'
		test `treatment'
		replace M4p = r(p) in `row'
		test $Zpctcolvars $Zstudentvars
		replace M4newp = r(p) in `row'		
		replace N = e(N) in `row'
		local row = `row' + 1
		}
	}
export excel DV-N using Output/ReplicationTables if _n<=`row', first(var) sheet(rsumm, replace)

* Appendix graphs
foreach dv in res dem {
	domodel4 `dv', treatment(protest_indicator) file("Output/ReplicationTables") sheet("M4`dv'") forgraph
	if "$longtime"=="" 	dograph
	else			 	dograph, highres
	}

*************************************

file close r
log close
